{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"gpuType":"T4"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU","widgets":{"application/vnd.jupyter.widget-state+json":{"52c4bf7418f74bc79a8c12fe35901974":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_c5e609d111b34d408a53a4cd71bb43d5","IPY_MODEL_0e0a20b5ed7a44e9834022e7eba2194d","IPY_MODEL_b5627331e78e4eb28765ed20f32cf403"],"layout":"IPY_MODEL_8084d4cb267f4a52b3d80ec34d291190"}},"c5e609d111b34d408a53a4cd71bb43d5":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_a8dcdf1f7ab64242acb057e8b54ebf79","placeholder":"​","style":"IPY_MODEL_1ca492fddbaa4ea7a3226649154e01fd","value":"Loading checkpoint shards: 100%"}},"0e0a20b5ed7a44e9834022e7eba2194d":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_a8eda8bfe08e4152a80c63830138c96d","max":2,"min":0,"orientation":"horizontal","style":"IPY_MODEL_1f258eacd6d0472385d41523b65dea8b","value":2}},"b5627331e78e4eb28765ed20f32cf403":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_228b1bcf604f454f8060a250b58008a1","placeholder":"​","style":"IPY_MODEL_90b281e9c5ed4e77ab93e5879d0b15a3","value":" 2/2 [01:13&lt;00:00, 33.04s/it]"}},"8084d4cb267f4a52b3d80ec34d291190":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a8dcdf1f7ab64242acb057e8b54ebf79":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1ca492fddbaa4ea7a3226649154e01fd":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"a8eda8bfe08e4152a80c63830138c96d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1f258eacd6d0472385d41523b65dea8b":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"228b1bcf604f454f8060a250b58008a1":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"90b281e9c5ed4e77ab93e5879d0b15a3":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"051d193cd87f47c1971fb87544e1e615":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_9d7247c119e642c5894f15ca6974ef3e","IPY_MODEL_a79c22bb34ec4f698a00752b47a6f631","IPY_MODEL_d95f3a3f26c6470d984542cdfd68bec1"],"layout":"IPY_MODEL_343e11c62a59448eb43bbc0c31bf5f11"}},"9d7247c119e642c5894f15ca6974ef3e":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_a153c96bd1fe4c48a41e9b9c7c00dd6e","placeholder":"​","style":"IPY_MODEL_84da055d24694320843e13ad37438792","value":"Loading checkpoint shards: 100%"}},"a79c22bb34ec4f698a00752b47a6f631":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_e375632975904402baea46163e2eeca1","max":2,"min":0,"orientation":"horizontal","style":"IPY_MODEL_95501d0b5a22407288f008bf8cc69726","value":2}},"d95f3a3f26c6470d984542cdfd68bec1":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_6aef866a6c474dfabb2140ded933c5aa","placeholder":"​","style":"IPY_MODEL_d66fa096d442423c9447cbfbdc1aad8d","value":" 2/2 [00:59&lt;00:00, 27.43s/it]"}},"343e11c62a59448eb43bbc0c31bf5f11":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a153c96bd1fe4c48a41e9b9c7c00dd6e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"84da055d24694320843e13ad37438792":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"e375632975904402baea46163e2eeca1":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"95501d0b5a22407288f008bf8cc69726":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"6aef866a6c474dfabb2140ded933c5aa":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d66fa096d442423c9447cbfbdc1aad8d":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"c99aff4cfd664ae8a165a27bea0566c8":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_e4b64cab6b7b418c8a2575ee26839039","IPY_MODEL_c3a4fedc73b3480089ef9d13381471ed","IPY_MODEL_bf722f71c61b4285bcbbf32fd619b3a6"],"layout":"IPY_MODEL_fd11a6148b704c5b9142c5e8de2d3b25"}},"e4b64cab6b7b418c8a2575ee26839039":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_f0bcdaf940d14ad796fc7ac46c8e1e64","placeholder":"​","style":"IPY_MODEL_b6e821c974674f2290c354238d6c919c","value":"Upload 2 LFS files: 100%"}},"c3a4fedc73b3480089ef9d13381471ed":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_eeba50e8242c4753bfc0ea48e03f9078","max":2,"min":0,"orientation":"horizontal","style":"IPY_MODEL_7a1f3340688d408092adade75f4baac4","value":2}},"bf722f71c61b4285bcbbf32fd619b3a6":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_8c887ca9b0eb44fdb8608bf36b5db5c5","placeholder":"​","style":"IPY_MODEL_e4698337e6b843afac706ab657ca6af9","value":" 2/2 [06:36&lt;00:00, 396.47s/it]"}},"fd11a6148b704c5b9142c5e8de2d3b25":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f0bcdaf940d14ad796fc7ac46c8e1e64":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b6e821c974674f2290c354238d6c919c":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"eeba50e8242c4753bfc0ea48e03f9078":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"7a1f3340688d408092adade75f4baac4":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"8c887ca9b0eb44fdb8608bf36b5db5c5":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e4698337e6b843afac706ab657ca6af9":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1af01f1f1aac42b8bff46fe4df8a59ad":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_eee8731f316244eda5ff0765fd12bf85","IPY_MODEL_f135278e410f4b708435bb80fb630bcf","IPY_MODEL_2e6fc79bf5c149d6b0bc5c52e18debc7"],"layout":"IPY_MODEL_a4b0debc025444a59abd6953b3512c0d"}},"eee8731f316244eda5ff0765fd12bf85":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_130120644beb48acbc038651459af43c","placeholder":"​","style":"IPY_MODEL_bf77e97593a349718bdb5fd9bfd28fe3","value":"pytorch_model-00001-of-00002.bin: 100%"}},"f135278e410f4b708435bb80fb630bcf":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_f7292741953e47699540ef8712fc0d8d","max":9976637886,"min":0,"orientation":"horizontal","style":"IPY_MODEL_9434350b1b9c4060812feb9ecbf63278","value":9976637886}},"2e6fc79bf5c149d6b0bc5c52e18debc7":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_b29647e268414329be56047e522e28b9","placeholder":"​","style":"IPY_MODEL_27bb18a199ca47108c7a61e9c443de36","value":" 9.98G/9.98G [06:35&lt;00:00, 25.8MB/s]"}},"a4b0debc025444a59abd6953b3512c0d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"130120644beb48acbc038651459af43c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bf77e97593a349718bdb5fd9bfd28fe3":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f7292741953e47699540ef8712fc0d8d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9434350b1b9c4060812feb9ecbf63278":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"b29647e268414329be56047e522e28b9":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"27bb18a199ca47108c7a61e9c443de36":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"33ebb868f3e846f6af1a1a2a8ad6a3cb":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_1f73f8b4d4da4e74adc135f2a2f6ee65","IPY_MODEL_68da6e6e69c8419895bea2068760534e","IPY_MODEL_6dc1a868e08c4c3b8315116d2c46573b"],"layout":"IPY_MODEL_7a5d714c17374104bb6f5caaa5541c10"}},"1f73f8b4d4da4e74adc135f2a2f6ee65":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_1b6c59a51359453c926bfcddb3d0f0ea","placeholder":"​","style":"IPY_MODEL_dac3669f18284161a58d52f26dffb761","value":"pytorch_model-00002-of-00002.bin: 100%"}},"68da6e6e69c8419895bea2068760534e":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_a3511f489f6d47cc8d404ab6f367b29f","max":3500316627,"min":0,"orientation":"horizontal","style":"IPY_MODEL_20670478612f4b1a8a5f23d71a2609a7","value":3500316627}},"6dc1a868e08c4c3b8315116d2c46573b":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_b463153ec04749e38540389efa2981f7","placeholder":"​","style":"IPY_MODEL_2bb3d36d248a48fba364f14d9e840306","value":" 3.50G/3.50G [02:27&lt;00:00, 26.4MB/s]"}},"7a5d714c17374104bb6f5caaa5541c10":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1b6c59a51359453c926bfcddb3d0f0ea":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"dac3669f18284161a58d52f26dffb761":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"a3511f489f6d47cc8d404ab6f367b29f":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"20670478612f4b1a8a5f23d71a2609a7":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"b463153ec04749e38540389efa2981f7":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2bb3d36d248a48fba364f14d9e840306":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"cells":[{"cell_type":"markdown","metadata":{"id":"view-in-github"},"source":["<a href=\"https://colab.research.google.com/github/mlabonne/llm-course/blob/main/Fine_tune_Llama_2_in_Google_Colab.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"]},{"cell_type":"markdown","source":["# Fine-tune Llama 2 in Google Colab\n","> 🗣️ Large Language Model Course\n","\n","❤️ Created by [@maximelabonne](https://twitter.com/maximelabonne), based on Younes Belkada's [GitHub Gist](https://gist.github.com/younesbelkada/9f7f75c94bdc1981c8ca5cc937d4a4da). Special thanks to Tolga HOŞGÖR for his solution to empty the VRAM.\n","\n","This notebook runs on a T4 GPU. (Last update: 01 Aug 2023)\n"],"metadata":{"id":"OSHlAbqzDFDq"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"GLXwJqbjtPho"},"outputs":[],"source":["!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7"]},{"cell_type":"code","source":["import os\n","import torch\n","from datasets import load_dataset\n","from transformers import (\n","    AutoModelForCausalLM,\n","    AutoTokenizer,\n","    BitsAndBytesConfig,\n","    HfArgumentParser,\n","    TrainingArguments,\n","    pipeline,\n","    logging,\n",")\n","from peft import LoraConfig, PeftModel\n","from trl import SFTTrainer"],"metadata":{"id":"nAMzy_0FtaUZ"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# The model that you want to train from the Hugging Face hub\n","model_name = \"NousResearch/Llama-2-7b-chat-hf\"\n","\n","# The instruction dataset to use\n","dataset_name = \"mlabonne/guanaco-llama2-1k\"\n","\n","# Fine-tuned model name\n","new_model = \"llama-2-7b-miniguanaco\"\n","\n","################################################################################\n","# QLoRA parameters\n","################################################################################\n","\n","# LoRA attention dimension\n","lora_r = 64\n","\n","# Alpha parameter for LoRA scaling\n","lora_alpha = 16\n","\n","# Dropout probability for LoRA layers\n","lora_dropout = 0.1\n","\n","################################################################################\n","# bitsandbytes parameters\n","################################################################################\n","\n","# Activate 4-bit precision base model loading\n","use_4bit = True\n","\n","# Compute dtype for 4-bit base models\n","bnb_4bit_compute_dtype = \"float16\"\n","\n","# Quantization type (fp4 or nf4)\n","bnb_4bit_quant_type = \"nf4\"\n","\n","# Activate nested quantization for 4-bit base models (double quantization)\n","use_nested_quant = False\n","\n","################################################################################\n","# TrainingArguments parameters\n","################################################################################\n","\n","# Output directory where the model predictions and checkpoints will be stored\n","output_dir = \"./results\"\n","\n","# Number of training epochs\n","num_train_epochs = 1\n","\n","# Enable fp16/bf16 training (set bf16 to True with an A100)\n","fp16 = False\n","bf16 = False\n","\n","# Batch size per GPU for training\n","per_device_train_batch_size = 4\n","\n","# Batch size per GPU for evaluation\n","per_device_eval_batch_size = 4\n","\n","# Number of update steps to accumulate the gradients for\n","gradient_accumulation_steps = 1\n","\n","# Enable gradient checkpointing\n","gradient_checkpointing = True\n","\n","# Maximum gradient normal (gradient clipping)\n","max_grad_norm = 0.3\n","\n","# Initial learning rate (AdamW optimizer)\n","learning_rate = 2e-4\n","\n","# Weight decay to apply to all layers except bias/LayerNorm weights\n","weight_decay = 0.001\n","\n","# Optimizer to use\n","optim = \"paged_adamw_32bit\"\n","\n","# Learning rate schedule\n","lr_scheduler_type = \"cosine\"\n","\n","# Number of training steps (overrides num_train_epochs)\n","max_steps = -1\n","\n","# Ratio of steps for a linear warmup (from 0 to learning rate)\n","warmup_ratio = 0.03\n","\n","# Group sequences into batches with same length\n","# Saves memory and speeds up training considerably\n","group_by_length = True\n","\n","# Save checkpoint every X updates steps\n","save_steps = 0\n","\n","# Log every X updates steps\n","logging_steps = 25\n","\n","################################################################################\n","# SFT parameters\n","################################################################################\n","\n","# Maximum sequence length to use\n","max_seq_length = None\n","\n","# Pack multiple short examples in the same input sequence to increase efficiency\n","packing = False\n","\n","# Load the entire model on the GPU 0\n","device_map = {\"\": 0}"],"metadata":{"id":"ib_We3NLtj2E"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Load dataset (you can process it here)\n","\n","# The instruction dataset to use\n","dataset_name = \"mlabonne/guanaco-llama2-1k\"\n","dataset = load_dataset(dataset_name, split=\"train\")\n","\n","# Load tokenizer and model with QLoRA configuration\n","compute_dtype = getattr(torch, bnb_4bit_compute_dtype)\n","\n","bnb_config = BitsAndBytesConfig(\n","    load_in_4bit=use_4bit,\n","    bnb_4bit_quant_type=bnb_4bit_quant_type,\n","    bnb_4bit_compute_dtype=compute_dtype,\n","    bnb_4bit_use_double_quant=use_nested_quant,\n",")\n","\n","# Check GPU compatibility with bfloat16\n","if compute_dtype == torch.float16 and use_4bit:\n","    major, _ = torch.cuda.get_device_capability()\n","    if major >= 8:\n","        print(\"=\" * 80)\n","        print(\"Your GPU supports bfloat16: accelerate training with bf16=True\")\n","        print(\"=\" * 80)\n","\n","# Load base model\n","model = AutoModelForCausalLM.from_pretrained(\n","    model_name,\n","    quantization_config=bnb_config,\n","    device_map=device_map\n",")\n","model.config.use_cache = False\n","model.config.pretraining_tp = 1\n","\n","# Load LLaMA tokenizer\n","tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n","tokenizer.pad_token = tokenizer.eos_token\n","tokenizer.padding_side = \"right\" # Fix weird overflow issue with fp16 training\n","\n","# Load LoRA configuration\n","peft_config = LoraConfig(\n","    lora_alpha=lora_alpha,\n","    lora_dropout=lora_dropout,\n","    r=lora_r,\n","    bias=\"none\",\n","    task_type=\"CAUSAL_LM\",\n",")\n","\n","# Set training parameters\n","training_arguments = TrainingArguments(\n","    output_dir=output_dir,\n","    num_train_epochs=num_train_epochs,\n","    per_device_train_batch_size=per_device_train_batch_size,\n","    gradient_accumulation_steps=gradient_accumulation_steps,\n","    optim=optim,\n","    save_steps=save_steps,\n","    logging_steps=logging_steps,\n","    learning_rate=learning_rate,\n","    weight_decay=weight_decay,\n","    fp16=fp16,\n","    bf16=bf16,\n","    max_grad_norm=max_grad_norm,\n","    max_steps=max_steps,\n","    warmup_ratio=warmup_ratio,\n","    group_by_length=group_by_length,\n","    lr_scheduler_type=lr_scheduler_type,\n","    report_to=\"tensorboard\"\n",")\n","\n","# Set supervised fine-tuning parameters\n","trainer = SFTTrainer(\n","    model=model,\n","    train_dataset=dataset,\n","    peft_config=peft_config,\n","    dataset_text_field=\"text\",\n","    max_seq_length=max_seq_length,\n","    tokenizer=tokenizer,\n","    args=training_arguments,\n","    packing=packing,\n",")\n","\n","# Train model\n","trainer.train()\n","\n","# Save trained model\n","trainer.model.save_pretrained(new_model)"],"metadata":{"id":"OJXpOgBFuSrc","colab":{"base_uri":"https://localhost:8080/","height":1000,"referenced_widgets":["52c4bf7418f74bc79a8c12fe35901974","c5e609d111b34d408a53a4cd71bb43d5","0e0a20b5ed7a44e9834022e7eba2194d","b5627331e78e4eb28765ed20f32cf403","8084d4cb267f4a52b3d80ec34d291190","a8dcdf1f7ab64242acb057e8b54ebf79","1ca492fddbaa4ea7a3226649154e01fd","a8eda8bfe08e4152a80c63830138c96d","1f258eacd6d0472385d41523b65dea8b","228b1bcf604f454f8060a250b58008a1","90b281e9c5ed4e77ab93e5879d0b15a3"]},"outputId":"8d06ed40-ea32-4d85-8665-413bde069607"},"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/plain":["Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"52c4bf7418f74bc79a8c12fe35901974"}},"metadata":{}},{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.10/dist-packages/peft/utils/other.py:102: FutureWarning: prepare_model_for_int8_training is deprecated and will be removed in a future version. Use prepare_model_for_kbit_training instead.\n","  warnings.warn(\n","/usr/local/lib/python3.10/dist-packages/trl/trainer/sft_trainer.py:159: UserWarning: You didn't pass a `max_seq_length` argument to the SFTTrainer, this will default to 1024\n","  warnings.warn(\n","You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"]},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":["\n","    <div>\n","      \n","      <progress value='250' max='250' style='width:300px; height:20px; vertical-align: middle;'></progress>\n","      [250/250 24:05, Epoch 1/1]\n","    </div>\n","    <table border=\"1\" class=\"dataframe\">\n","  <thead>\n"," <tr style=\"text-align: left;\">\n","      <th>Step</th>\n","      <th>Training Loss</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <td>1</td>\n","      <td>1.350100</td>\n","    </tr>\n","    <tr>\n","      <td>2</td>\n","      <td>2.015800</td>\n","    </tr>\n","    <tr>\n","      <td>3</td>\n","      <td>1.048700</td>\n","    </tr>\n","    <tr>\n","      <td>4</td>\n","      <td>1.287700</td>\n","    </tr>\n","    <tr>\n","      <td>5</td>\n","      <td>1.451200</td>\n","    </tr>\n","    <tr>\n","      <td>6</td>\n","      <td>1.659900</td>\n","    </tr>\n","    <tr>\n","      <td>7</td>\n","      <td>1.472300</td>\n","    </tr>\n","    <tr>\n","      <td>8</td>\n","      <td>1.326700</td>\n","    </tr>\n","    <tr>\n","      <td>9</td>\n","      <td>1.140000</td>\n","    </tr>\n","    <tr>\n","      <td>10</td>\n","      <td>1.395300</td>\n","    </tr>\n","    <tr>\n","      <td>11</td>\n","      <td>1.776400</td>\n","    </tr>\n","    <tr>\n","      <td>12</td>\n","      <td>1.169100</td>\n","    </tr>\n","    <tr>\n","      <td>13</td>\n","      <td>1.434700</td>\n","    </tr>\n","    <tr>\n","      <td>14</td>\n","      <td>1.550400</td>\n","    </tr>\n","    <tr>\n","      <td>15</td>\n","      <td>1.440400</td>\n","    </tr>\n","    <tr>\n","      <td>16</td>\n","      <td>1.352100</td>\n","    </tr>\n","    <tr>\n","      <td>17</td>\n","      <td>1.062800</td>\n","    </tr>\n","    <tr>\n","      <td>18</td>\n","      <td>1.173400</td>\n","    </tr>\n","    <tr>\n","      <td>19</td>\n","      <td>1.385300</td>\n","    </tr>\n","    <tr>\n","      <td>20</td>\n","      <td>1.433300</td>\n","    </tr>\n","    <tr>\n","      <td>21</td>\n","      <td>1.787800</td>\n","    </tr>\n","    <tr>\n","      <td>22</td>\n","      <td>1.600200</td>\n","    </tr>\n","    <tr>\n","      <td>23</td>\n","      <td>1.067800</td>\n","    </tr>\n","    <tr>\n","      <td>24</td>\n","      <td>1.679300</td>\n","    </tr>\n","    <tr>\n","      <td>25</td>\n","      <td>1.209900</td>\n","    </tr>\n","    <tr>\n","      <td>26</td>\n","      <td>1.305200</td>\n","    </tr>\n","    <tr>\n","      <td>27</td>\n","      <td>1.465300</td>\n","    </tr>\n","    <tr>\n","      <td>28</td>\n","      <td>1.781800</td>\n","    </tr>\n","    <tr>\n","      <td>29</td>\n","      <td>1.152400</td>\n","    </tr>\n","    <tr>\n","      <td>30</td>\n","      <td>1.434400</td>\n","    </tr>\n","    <tr>\n","      <td>31</td>\n","      <td>1.399300</td>\n","    </tr>\n","    <tr>\n","      <td>32</td>\n","      <td>1.796300</td>\n","    </tr>\n","    <tr>\n","      <td>33</td>\n","      <td>1.674500</td>\n","    </tr>\n","    <tr>\n","      <td>34</td>\n","      <td>1.567600</td>\n","    </tr>\n","    <tr>\n","      <td>35</td>\n","      <td>1.830000</td>\n","    </tr>\n","    <tr>\n","      <td>36</td>\n","      <td>1.720200</td>\n","    </tr>\n","    <tr>\n","      <td>37</td>\n","      <td>1.335800</td>\n","    </tr>\n","    <tr>\n","      <td>38</td>\n","      <td>1.333000</td>\n","    </tr>\n","    <tr>\n","      <td>39</td>\n","      <td>2.044900</td>\n","    </tr>\n","    <tr>\n","      <td>40</td>\n","      <td>1.832200</td>\n","    </tr>\n","    <tr>\n","      <td>41</td>\n","      <td>1.533900</td>\n","    </tr>\n","    <tr>\n","      <td>42</td>\n","      <td>1.259900</td>\n","    </tr>\n","    <tr>\n","      <td>43</td>\n","      <td>1.372300</td>\n","    </tr>\n","    <tr>\n","      <td>44</td>\n","      <td>1.551600</td>\n","    </tr>\n","    <tr>\n","      <td>45</td>\n","      <td>2.002400</td>\n","    </tr>\n","    <tr>\n","      <td>46</td>\n","      <td>1.956100</td>\n","    </tr>\n","    <tr>\n","      <td>47</td>\n","      <td>2.441900</td>\n","    </tr>\n","    <tr>\n","      <td>48</td>\n","      <td>2.289100</td>\n","    </tr>\n","    <tr>\n","      <td>49</td>\n","      <td>1.544500</td>\n","    </tr>\n","    <tr>\n","      <td>50</td>\n","      <td>2.040300</td>\n","    </tr>\n","    <tr>\n","      <td>51</td>\n","      <td>1.103800</td>\n","    </tr>\n","    <tr>\n","      <td>52</td>\n","      <td>1.630800</td>\n","    </tr>\n","    <tr>\n","      <td>53</td>\n","      <td>1.437900</td>\n","    </tr>\n","    <tr>\n","      <td>54</td>\n","      <td>1.820900</td>\n","    </tr>\n","    <tr>\n","      <td>55</td>\n","      <td>1.080300</td>\n","    </tr>\n","    <tr>\n","      <td>56</td>\n","      <td>1.029200</td>\n","    </tr>\n","    <tr>\n","      <td>57</td>\n","      <td>0.999400</td>\n","    </tr>\n","    <tr>\n","      <td>58</td>\n","      <td>0.795900</td>\n","    </tr>\n","    <tr>\n","      <td>59</td>\n","      <td>1.331600</td>\n","    </tr>\n","    <tr>\n","      <td>60</td>\n","      <td>1.099500</td>\n","    </tr>\n","    <tr>\n","      <td>61</td>\n","      <td>1.199000</td>\n","    </tr>\n","    <tr>\n","      <td>62</td>\n","      <td>1.146000</td>\n","    </tr>\n","    <tr>\n","      <td>63</td>\n","      <td>1.129000</td>\n","    </tr>\n","    <tr>\n","      <td>64</td>\n","      <td>1.109500</td>\n","    </tr>\n","    <tr>\n","      <td>65</td>\n","      <td>1.207000</td>\n","    </tr>\n","    <tr>\n","      <td>66</td>\n","      <td>1.360600</td>\n","    </tr>\n","    <tr>\n","      <td>67</td>\n","      <td>1.879000</td>\n","    </tr>\n","    <tr>\n","      <td>68</td>\n","      <td>1.317200</td>\n","    </tr>\n","    <tr>\n","      <td>69</td>\n","      <td>1.033300</td>\n","    </tr>\n","    <tr>\n","      <td>70</td>\n","      <td>1.153400</td>\n","    </tr>\n","    <tr>\n","      <td>71</td>\n","      <td>1.112400</td>\n","    </tr>\n","    <tr>\n","      <td>72</td>\n","      <td>1.218400</td>\n","    </tr>\n","    <tr>\n","      <td>73</td>\n","      <td>1.134600</td>\n","    </tr>\n","    <tr>\n","      <td>74</td>\n","      <td>1.053200</td>\n","    </tr>\n","    <tr>\n","      <td>75</td>\n","      <td>1.008900</td>\n","    </tr>\n","    <tr>\n","      <td>76</td>\n","      <td>1.077000</td>\n","    </tr>\n","    <tr>\n","      <td>77</td>\n","      <td>1.245000</td>\n","    </tr>\n","    <tr>\n","      <td>78</td>\n","      <td>1.395900</td>\n","    </tr>\n","    <tr>\n","      <td>79</td>\n","      <td>1.488800</td>\n","    </tr>\n","    <tr>\n","      <td>80</td>\n","      <td>1.382500</td>\n","    </tr>\n","    <tr>\n","      <td>81</td>\n","      <td>1.442200</td>\n","    </tr>\n","    <tr>\n","      <td>82</td>\n","      <td>1.028500</td>\n","    </tr>\n","    <tr>\n","      <td>83</td>\n","      <td>1.208500</td>\n","    </tr>\n","    <tr>\n","      <td>84</td>\n","      <td>1.780200</td>\n","    </tr>\n","    <tr>\n","      <td>85</td>\n","      <td>1.679300</td>\n","    </tr>\n","    <tr>\n","      <td>86</td>\n","      <td>1.276600</td>\n","    </tr>\n","    <tr>\n","      <td>87</td>\n","      <td>1.374600</td>\n","    </tr>\n","    <tr>\n","      <td>88</td>\n","      <td>1.490000</td>\n","    </tr>\n","    <tr>\n","      <td>89</td>\n","      <td>1.567100</td>\n","    </tr>\n","    <tr>\n","      <td>90</td>\n","      <td>1.435000</td>\n","    </tr>\n","    <tr>\n","      <td>91</td>\n","      <td>1.329800</td>\n","    </tr>\n","    <tr>\n","      <td>92</td>\n","      <td>1.387600</td>\n","    </tr>\n","    <tr>\n","      <td>93</td>\n","      <td>0.971400</td>\n","    </tr>\n","    <tr>\n","      <td>94</td>\n","      <td>1.293800</td>\n","    </tr>\n","    <tr>\n","      <td>95</td>\n","      <td>1.585900</td>\n","    </tr>\n","    <tr>\n","      <td>96</td>\n","      <td>1.431700</td>\n","    </tr>\n","    <tr>\n","      <td>97</td>\n","      <td>1.948900</td>\n","    </tr>\n","    <tr>\n","      <td>98</td>\n","      <td>1.630500</td>\n","    </tr>\n","    <tr>\n","      <td>99</td>\n","      <td>1.839100</td>\n","    </tr>\n","    <tr>\n","      <td>100</td>\n","      <td>1.740900</td>\n","    </tr>\n","    <tr>\n","      <td>101</td>\n","      <td>0.717200</td>\n","    </tr>\n","    <tr>\n","      <td>102</td>\n","      <td>0.958100</td>\n","    </tr>\n","    <tr>\n","      <td>103</td>\n","      <td>1.625900</td>\n","    </tr>\n","    <tr>\n","      <td>104</td>\n","      <td>1.150000</td>\n","    </tr>\n","    <tr>\n","      <td>105</td>\n","      <td>0.999200</td>\n","    </tr>\n","    <tr>\n","      <td>106</td>\n","      <td>1.253100</td>\n","    </tr>\n","    <tr>\n","      <td>107</td>\n","      <td>1.007600</td>\n","    </tr>\n","    <tr>\n","      <td>108</td>\n","      <td>1.049700</td>\n","    </tr>\n","    <tr>\n","      <td>109</td>\n","      <td>1.265900</td>\n","    </tr>\n","    <tr>\n","      <td>110</td>\n","      <td>1.251300</td>\n","    </tr>\n","    <tr>\n","      <td>111</td>\n","      <td>1.109500</td>\n","    </tr>\n","    <tr>\n","      <td>112</td>\n","      <td>1.652500</td>\n","    </tr>\n","    <tr>\n","      <td>113</td>\n","      <td>1.238000</td>\n","    </tr>\n","    <tr>\n","      <td>114</td>\n","      <td>1.521300</td>\n","    </tr>\n","    <tr>\n","      <td>115</td>\n","      <td>1.002400</td>\n","    </tr>\n","    <tr>\n","      <td>116</td>\n","      <td>0.982400</td>\n","    </tr>\n","    <tr>\n","      <td>117</td>\n","      <td>1.389300</td>\n","    </tr>\n","    <tr>\n","      <td>118</td>\n","      <td>1.114900</td>\n","    </tr>\n","    <tr>\n","      <td>119</td>\n","      <td>1.093900</td>\n","    </tr>\n","    <tr>\n","      <td>120</td>\n","      <td>1.254200</td>\n","    </tr>\n","    <tr>\n","      <td>121</td>\n","      <td>1.132300</td>\n","    </tr>\n","    <tr>\n","      <td>122</td>\n","      <td>0.925300</td>\n","    </tr>\n","    <tr>\n","      <td>123</td>\n","      <td>1.292700</td>\n","    </tr>\n","    <tr>\n","      <td>124</td>\n","      <td>1.317600</td>\n","    </tr>\n","    <tr>\n","      <td>125</td>\n","      <td>1.080400</td>\n","    </tr>\n","    <tr>\n","      <td>126</td>\n","      <td>0.918800</td>\n","    </tr>\n","    <tr>\n","      <td>127</td>\n","      <td>1.203400</td>\n","    </tr>\n","    <tr>\n","      <td>128</td>\n","      <td>1.098800</td>\n","    </tr>\n","    <tr>\n","      <td>129</td>\n","      <td>1.360800</td>\n","    </tr>\n","    <tr>\n","      <td>130</td>\n","      <td>1.256900</td>\n","    </tr>\n","    <tr>\n","      <td>131</td>\n","      <td>1.392600</td>\n","    </tr>\n","    <tr>\n","      <td>132</td>\n","      <td>1.167600</td>\n","    </tr>\n","    <tr>\n","      <td>133</td>\n","      <td>1.134900</td>\n","    </tr>\n","    <tr>\n","      <td>134</td>\n","      <td>1.423700</td>\n","    </tr>\n","    <tr>\n","      <td>135</td>\n","      <td>1.111200</td>\n","    </tr>\n","    <tr>\n","      <td>136</td>\n","      <td>1.081600</td>\n","    </tr>\n","    <tr>\n","      <td>137</td>\n","      <td>1.806000</td>\n","    </tr>\n","    <tr>\n","      <td>138</td>\n","      <td>1.238800</td>\n","    </tr>\n","    <tr>\n","      <td>139</td>\n","      <td>1.306800</td>\n","    </tr>\n","    <tr>\n","      <td>140</td>\n","      <td>1.421900</td>\n","    </tr>\n","    <tr>\n","      <td>141</td>\n","      <td>1.467300</td>\n","    </tr>\n","    <tr>\n","      <td>142</td>\n","      <td>1.245100</td>\n","    </tr>\n","    <tr>\n","      <td>143</td>\n","      <td>1.594200</td>\n","    </tr>\n","    <tr>\n","      <td>144</td>\n","      <td>1.426000</td>\n","    </tr>\n","    <tr>\n","      <td>145</td>\n","      <td>1.393800</td>\n","    </tr>\n","    <tr>\n","      <td>146</td>\n","      <td>1.894400</td>\n","    </tr>\n","    <tr>\n","      <td>147</td>\n","      <td>1.331200</td>\n","    </tr>\n","    <tr>\n","      <td>148</td>\n","      <td>1.519400</td>\n","    </tr>\n","    <tr>\n","      <td>149</td>\n","      <td>1.926300</td>\n","    </tr>\n","    <tr>\n","      <td>150</td>\n","      <td>1.293200</td>\n","    </tr>\n","    <tr>\n","      <td>151</td>\n","      <td>1.135100</td>\n","    </tr>\n","    <tr>\n","      <td>152</td>\n","      <td>1.066700</td>\n","    </tr>\n","    <tr>\n","      <td>153</td>\n","      <td>0.856900</td>\n","    </tr>\n","    <tr>\n","      <td>154</td>\n","      <td>1.021500</td>\n","    </tr>\n","    <tr>\n","      <td>155</td>\n","      <td>0.808800</td>\n","    </tr>\n","    <tr>\n","      <td>156</td>\n","      <td>0.936300</td>\n","    </tr>\n","    <tr>\n","      <td>157</td>\n","      <td>0.979700</td>\n","    </tr>\n","    <tr>\n","      <td>158</td>\n","      <td>1.100200</td>\n","    </tr>\n","    <tr>\n","      <td>159</td>\n","      <td>1.091400</td>\n","    </tr>\n","    <tr>\n","      <td>160</td>\n","      <td>0.918800</td>\n","    </tr>\n","    <tr>\n","      <td>161</td>\n","      <td>1.370800</td>\n","    </tr>\n","    <tr>\n","      <td>162</td>\n","      <td>1.380300</td>\n","    </tr>\n","    <tr>\n","      <td>163</td>\n","      <td>0.965300</td>\n","    </tr>\n","    <tr>\n","      <td>164</td>\n","      <td>1.142400</td>\n","    </tr>\n","    <tr>\n","      <td>165</td>\n","      <td>1.436400</td>\n","    </tr>\n","    <tr>\n","      <td>166</td>\n","      <td>0.970400</td>\n","    </tr>\n","    <tr>\n","      <td>167</td>\n","      <td>0.872600</td>\n","    </tr>\n","    <tr>\n","      <td>168</td>\n","      <td>1.662500</td>\n","    </tr>\n","    <tr>\n","      <td>169</td>\n","      <td>1.623500</td>\n","    </tr>\n","    <tr>\n","      <td>170</td>\n","      <td>1.481700</td>\n","    </tr>\n","    <tr>\n","      <td>171</td>\n","      <td>0.822300</td>\n","    </tr>\n","    <tr>\n","      <td>172</td>\n","      <td>1.605500</td>\n","    </tr>\n","    <tr>\n","      <td>173</td>\n","      <td>1.769800</td>\n","    </tr>\n","    <tr>\n","      <td>174</td>\n","      <td>1.320100</td>\n","    </tr>\n","    <tr>\n","      <td>175</td>\n","      <td>0.969300</td>\n","    </tr>\n","    <tr>\n","      <td>176</td>\n","      <td>0.798700</td>\n","    </tr>\n","    <tr>\n","      <td>177</td>\n","      <td>1.233200</td>\n","    </tr>\n","    <tr>\n","      <td>178</td>\n","      <td>1.168500</td>\n","    </tr>\n","    <tr>\n","      <td>179</td>\n","      <td>1.251400</td>\n","    </tr>\n","    <tr>\n","      <td>180</td>\n","      <td>1.221500</td>\n","    </tr>\n","    <tr>\n","      <td>181</td>\n","      <td>1.491100</td>\n","    </tr>\n","    <tr>\n","      <td>182</td>\n","      <td>1.010200</td>\n","    </tr>\n","    <tr>\n","      <td>183</td>\n","      <td>1.375500</td>\n","    </tr>\n","    <tr>\n","      <td>184</td>\n","      <td>1.722900</td>\n","    </tr>\n","    <tr>\n","      <td>185</td>\n","      <td>1.179300</td>\n","    </tr>\n","    <tr>\n","      <td>186</td>\n","      <td>1.474400</td>\n","    </tr>\n","    <tr>\n","      <td>187</td>\n","      <td>1.968200</td>\n","    </tr>\n","    <tr>\n","      <td>188</td>\n","      <td>1.297200</td>\n","    </tr>\n","    <tr>\n","      <td>189</td>\n","      <td>1.564500</td>\n","    </tr>\n","    <tr>\n","      <td>190</td>\n","      <td>1.480700</td>\n","    </tr>\n","    <tr>\n","      <td>191</td>\n","      <td>1.464700</td>\n","    </tr>\n","    <tr>\n","      <td>192</td>\n","      <td>1.901400</td>\n","    </tr>\n","    <tr>\n","      <td>193</td>\n","      <td>1.620100</td>\n","    </tr>\n","    <tr>\n","      <td>194</td>\n","      <td>1.509000</td>\n","    </tr>\n","    <tr>\n","      <td>195</td>\n","      <td>1.587000</td>\n","    </tr>\n","    <tr>\n","      <td>196</td>\n","      <td>1.510000</td>\n","    </tr>\n","    <tr>\n","      <td>197</td>\n","      <td>1.773900</td>\n","    </tr>\n","    <tr>\n","      <td>198</td>\n","      <td>1.473200</td>\n","    </tr>\n","    <tr>\n","      <td>199</td>\n","      <td>1.660400</td>\n","    </tr>\n","    <tr>\n","      <td>200</td>\n","      <td>1.832600</td>\n","    </tr>\n","    <tr>\n","      <td>201</td>\n","      <td>1.021400</td>\n","    </tr>\n","    <tr>\n","      <td>202</td>\n","      <td>1.120400</td>\n","    </tr>\n","    <tr>\n","      <td>203</td>\n","      <td>1.030200</td>\n","    </tr>\n","    <tr>\n","      <td>204</td>\n","      <td>1.167500</td>\n","    </tr>\n","    <tr>\n","      <td>205</td>\n","      <td>0.853200</td>\n","    </tr>\n","    <tr>\n","      <td>206</td>\n","      <td>0.927000</td>\n","    </tr>\n","    <tr>\n","      <td>207</td>\n","      <td>1.157400</td>\n","    </tr>\n","    <tr>\n","      <td>208</td>\n","      <td>1.071600</td>\n","    </tr>\n","    <tr>\n","      <td>209</td>\n","      <td>1.195400</td>\n","    </tr>\n","    <tr>\n","      <td>210</td>\n","      <td>1.155800</td>\n","    </tr>\n","    <tr>\n","      <td>211</td>\n","      <td>1.502300</td>\n","    </tr>\n","    <tr>\n","      <td>212</td>\n","      <td>1.091600</td>\n","    </tr>\n","    <tr>\n","      <td>213</td>\n","      <td>1.225200</td>\n","    </tr>\n","    <tr>\n","      <td>214</td>\n","      <td>1.148900</td>\n","    </tr>\n","    <tr>\n","      <td>215</td>\n","      <td>1.238200</td>\n","    </tr>\n","    <tr>\n","      <td>216</td>\n","      <td>1.600200</td>\n","    </tr>\n","    <tr>\n","      <td>217</td>\n","      <td>1.203600</td>\n","    </tr>\n","    <tr>\n","      <td>218</td>\n","      <td>1.266200</td>\n","    </tr>\n","    <tr>\n","      <td>219</td>\n","      <td>0.970900</td>\n","    </tr>\n","    <tr>\n","      <td>220</td>\n","      <td>1.451000</td>\n","    </tr>\n","    <tr>\n","      <td>221</td>\n","      <td>1.281300</td>\n","    </tr>\n","    <tr>\n","      <td>222</td>\n","      <td>0.952500</td>\n","    </tr>\n","    <tr>\n","      <td>223</td>\n","      <td>1.313800</td>\n","    </tr>\n","    <tr>\n","      <td>224</td>\n","      <td>0.915700</td>\n","    </tr>\n","    <tr>\n","      <td>225</td>\n","      <td>1.040000</td>\n","    </tr>\n","    <tr>\n","      <td>226</td>\n","      <td>1.493800</td>\n","    </tr>\n","    <tr>\n","      <td>227</td>\n","      <td>1.186400</td>\n","    </tr>\n","    <tr>\n","      <td>228</td>\n","      <td>1.278700</td>\n","    </tr>\n","    <tr>\n","      <td>229</td>\n","      <td>1.061100</td>\n","    </tr>\n","    <tr>\n","      <td>230</td>\n","      <td>1.209000</td>\n","    </tr>\n","    <tr>\n","      <td>231</td>\n","      <td>0.881400</td>\n","    </tr>\n","    <tr>\n","      <td>232</td>\n","      <td>1.659300</td>\n","    </tr>\n","    <tr>\n","      <td>233</td>\n","      <td>1.135200</td>\n","    </tr>\n","    <tr>\n","      <td>234</td>\n","      <td>1.497800</td>\n","    </tr>\n","    <tr>\n","      <td>235</td>\n","      <td>1.557500</td>\n","    </tr>\n","    <tr>\n","      <td>236</td>\n","      <td>0.849200</td>\n","    </tr>\n","    <tr>\n","      <td>237</td>\n","      <td>1.329200</td>\n","    </tr>\n","    <tr>\n","      <td>238</td>\n","      <td>1.147700</td>\n","    </tr>\n","    <tr>\n","      <td>239</td>\n","      <td>1.764600</td>\n","    </tr>\n","    <tr>\n","      <td>240</td>\n","      <td>1.740000</td>\n","    </tr>\n","    <tr>\n","      <td>241</td>\n","      <td>2.043700</td>\n","    </tr>\n","    <tr>\n","      <td>242</td>\n","      <td>1.675000</td>\n","    </tr>\n","    <tr>\n","      <td>243</td>\n","      <td>1.809600</td>\n","    </tr>\n","    <tr>\n","      <td>244</td>\n","      <td>1.721400</td>\n","    </tr>\n","    <tr>\n","      <td>245</td>\n","      <td>2.343300</td>\n","    </tr>\n","    <tr>\n","      <td>246</td>\n","      <td>1.830400</td>\n","    </tr>\n","    <tr>\n","      <td>247</td>\n","      <td>1.754400</td>\n","    </tr>\n","    <tr>\n","      <td>248</td>\n","      <td>1.741900</td>\n","    </tr>\n","    <tr>\n","      <td>249</td>\n","      <td>2.011000</td>\n","    </tr>\n","    <tr>\n","      <td>250</td>\n","      <td>1.741700</td>\n","    </tr>\n","  </tbody>\n","</table><p>"]},"metadata":{}}]},{"cell_type":"code","source":["# %load_ext tensorboard\n","# %tensorboard --logdir results/runs"],"metadata":{"id":"crj9svNe4hU5"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Ignore warnings\n","logging.set_verbosity(logging.CRITICAL)\n","\n","# Run text generation pipeline with our next model\n","prompt = \"What is a large language model?\"\n","pipe = pipeline(task=\"text-generation\", model=model, tokenizer=tokenizer, max_length=200)\n","result = pipe(f\"<s>[INST] {prompt} [/INST]\")\n","print(result[0]['generated_text'])"],"metadata":{"id":"frlSLPin4IJ4","colab":{"base_uri":"https://localhost:8080/"},"outputId":"e5bf6b3a-f20e-49f7-e0b7-36f71ca207c1"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1270: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use a generation configuration file (see https://huggingface.co/docs/transformers/main_classes/text_generation )\n","  warnings.warn(\n","/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:31: UserWarning: None of the inputs have requires_grad=True. Gradients will be None\n","  warnings.warn(\"None of the inputs have requires_grad=True. Gradients will be None\")\n"]},{"output_type":"stream","name":"stdout","text":["<s>[INST] What is a large language model? [/INST] A large language model is a type of artificial intelligence (AI) model that is trained on a large dataset of text to generate human-like language outputs. It is designed to be able to understand and generate text in a way that is similar to human language, and can be used for a wide range of applications such as chatbots, language translation, and text summarization.\n","\n","Large language models are typically trained using deep learning techniques, such as recurrent neural networks (RNNs) or transformer models, and are often based on pre-trained models such as BERT or RoBERTa. These models are trained on large datasets of text, such as books, articles, or websites, and are designed to learn the patterns and structures of language.\n","\n","Some examples of large language models include:\n","\n","* BERT (Bidirectional Encoder Representations from Transformers\n"]}]},{"cell_type":"code","source":["# Empty VRAM\n","del model\n","del pipe\n","del trainer\n","import gc\n","gc.collect()\n","gc.collect()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"mkQCviG0Zta-","outputId":"e7c4ab10-4039-4490-b7f0-6ea118bdd709"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["19965"]},"metadata":{},"execution_count":7}]},{"cell_type":"code","source":["# Reload model in FP16 and merge it with LoRA weights\n","base_model = AutoModelForCausalLM.from_pretrained(\n","    model_name,\n","    low_cpu_mem_usage=True,\n","    return_dict=True,\n","    torch_dtype=torch.float16,\n","    device_map=device_map,\n",")\n","model = PeftModel.from_pretrained(base_model, new_model)\n","model = model.merge_and_unload()\n","\n","# Reload tokenizer to save it\n","tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n","tokenizer.pad_token = tokenizer.eos_token\n","tokenizer.padding_side = \"right\""],"metadata":{"id":"QQn30cRtAZ-P","colab":{"base_uri":"https://localhost:8080/","height":49,"referenced_widgets":["051d193cd87f47c1971fb87544e1e615","9d7247c119e642c5894f15ca6974ef3e","a79c22bb34ec4f698a00752b47a6f631","d95f3a3f26c6470d984542cdfd68bec1","343e11c62a59448eb43bbc0c31bf5f11","a153c96bd1fe4c48a41e9b9c7c00dd6e","84da055d24694320843e13ad37438792","e375632975904402baea46163e2eeca1","95501d0b5a22407288f008bf8cc69726","6aef866a6c474dfabb2140ded933c5aa","d66fa096d442423c9447cbfbdc1aad8d"]},"outputId":"1c5ef3c4-d107-4c43-9bd6-0ca72903db0e"},"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/plain":["Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"051d193cd87f47c1971fb87544e1e615"}},"metadata":{}}]},{"cell_type":"code","source":["!huggingface-cli login\n","\n","model.push_to_hub(new_model, use_temp_dir=False)\n","tokenizer.push_to_hub(new_model, use_temp_dir=False)"],"metadata":{"id":"x-xPb-_qB0dz","colab":{"base_uri":"https://localhost:8080/","height":373,"referenced_widgets":["c99aff4cfd664ae8a165a27bea0566c8","e4b64cab6b7b418c8a2575ee26839039","c3a4fedc73b3480089ef9d13381471ed","bf722f71c61b4285bcbbf32fd619b3a6","fd11a6148b704c5b9142c5e8de2d3b25","f0bcdaf940d14ad796fc7ac46c8e1e64","b6e821c974674f2290c354238d6c919c","eeba50e8242c4753bfc0ea48e03f9078","7a1f3340688d408092adade75f4baac4","8c887ca9b0eb44fdb8608bf36b5db5c5","e4698337e6b843afac706ab657ca6af9","1af01f1f1aac42b8bff46fe4df8a59ad","eee8731f316244eda5ff0765fd12bf85","f135278e410f4b708435bb80fb630bcf","2e6fc79bf5c149d6b0bc5c52e18debc7","a4b0debc025444a59abd6953b3512c0d","130120644beb48acbc038651459af43c","bf77e97593a349718bdb5fd9bfd28fe3","f7292741953e47699540ef8712fc0d8d","9434350b1b9c4060812feb9ecbf63278","b29647e268414329be56047e522e28b9","27bb18a199ca47108c7a61e9c443de36","33ebb868f3e846f6af1a1a2a8ad6a3cb","1f73f8b4d4da4e74adc135f2a2f6ee65","68da6e6e69c8419895bea2068760534e","6dc1a868e08c4c3b8315116d2c46573b","7a5d714c17374104bb6f5caaa5541c10","1b6c59a51359453c926bfcddb3d0f0ea","dac3669f18284161a58d52f26dffb761","a3511f489f6d47cc8d404ab6f367b29f","20670478612f4b1a8a5f23d71a2609a7","b463153ec04749e38540389efa2981f7","2bb3d36d248a48fba364f14d9e840306"]},"outputId":"6ed9166c-5f92-4375-eca5-dbb247c0e13a"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["\n","    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|\n","    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|\n","    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|\n","    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|\n","    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|\n","    \n","    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .\n","Token: \n","Add token as git credential? (Y/n) n\n","Token is valid (permission: write).\n","Your token has been saved to /root/.cache/huggingface/token\n","Login successful\n"]},{"output_type":"display_data","data":{"text/plain":["Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"c99aff4cfd664ae8a165a27bea0566c8"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.98G [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"1af01f1f1aac42b8bff46fe4df8a59ad"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.50G [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"33ebb868f3e846f6af1a1a2a8ad6a3cb"}},"metadata":{}},{"output_type":"execute_result","data":{"text/plain":["CommitInfo(commit_url='https://huggingface.co/mlabonne/llama-2-7b-miniguanaco/commit/c81a32fd0b4d39e252326e639d63e75aa68c9a4a', commit_message='Upload tokenizer', commit_description='', oid='c81a32fd0b4d39e252326e639d63e75aa68c9a4a', pr_url=None, pr_revision=None, pr_num=None)"]},"metadata":{},"execution_count":10}]}]}